import scrapy
from faker import Faker
from mztr.items import *
# from mztr.items import StrItem
# from mztr.items import FoodItem
# from mztr.message import Message
from mztr.message import *
import random
import time
import json
import sys
import requests as req
from lxml import etree
import math

class MztrSpider(scrapy.Spider):
	name = 'mztrs'
	allowed_domains = ['meituan.com']
	start_urls = ['https://www.meituan.com/changecity/']
	header={
	"User-Agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'
	}

	comments_list = []
	tags = []

	def parse(self,response):
		city_list = response.xpath("//div[@class='city-area']/span/a")
		# pds = city_list[655]
		vh = city_list[1155]
		# city_name = pds.xpath("./text()").extract()[0]
		pds_url = "https:"+pds.xpath("./@href").extract()[0]+"/meishi/"
		time.sleep(random.uniform(1,3))
		yield scrapy.Request(pds_url,headers=self.header,callback=self.parse_two,dont_filter=True,meta={'pds_url':pds_url})
		# html = req.get(pds_url,headers=self.header).content.decode("utf8")

	def parse_two(self,response):
		# jxxi = etree.HTML(html)
		# pds_url = response.meta['pds_url']
		pds_url = response.url
		jx = response.xpath("//script/text()").extract()[5]
		hide_json = get_json(jx)
		all_shop_num = hide_json['poiLists']['totalCounts']
		all_page_num = int(math.ceil(float(all_shop_num)/15))		#向上取整

		for page in range(1,all_page_num):
			url = pds_url+"pn"+str(page)+"/"
			time.sleep(random.uniform(1,3))
			yield scrapy.Request(url,headers=self.header,callback=self.parse_three,dont_filter=True)

	# page_flag = False
	def parse_three(self,response):
		html = response.xpath("//script/text()").extract()[5]
		hide_json = get_json(html)
		cityName = hide_json['$meta']['cityName']
		shop_list = hide_json['poiLists']['poiInfos']
		for shop in shop_list:
			mztrItem = MztrItem()
			mztrItem['shopId'] = shop['poiId']
			mztrItem['shopName'] = shop['title']
			mztrItem['avgScore'] = shop['avgScore']
			mztrItem['cityName'] = cityName
			mztrItem['allCommentNum'] = shop['allCommentNum']
			mztrItem['address'] = shop['address']
			mztrItem['avgPrice'] = shop['avgPrice']
			mztrItem['frontimg'] = shop['frontImg']
			# time.sleep(random.uniform(2,4))
			# shop_hide_message_json = get_message(pylp_url)

			# com = parse_four(mztrItem['shopId'])
			# mztrItem['commentTags'] = json.dumps(com[0])
			# mztrItem['comments'] = json.dumps(com[1])
			yield mztrItem







	# 		all_page = int(math.ceil(float(shop['allCommentNum'])/10))
	# 		self.page_flag = True
	# 		if all_page != 0:
	# 			for page in range(all_page+1):
	# 				if self.page_flag == False:
	# 					break
	# 				url = "https://www.meituan.com/meishi/api/poi/getMerchantComment?uuid=57ff7157f23a4631b6bf.1602837341.1.0.0&platform=1&partner=126&originUrl=https://www.meituan.com/meishi/"+str(shop['poiId'])+"/&riskLevel=1&optimusCode=10&id="+str(shop['poiId'])+"&userId=&offset="+str(page*10)+"&pageSize=10&sortType=1"
	# 				print("scrapying page in:"+str(page)+"/"+str(all_page)+" at "+str(shop['poiId']))
	# 				# time.sleep(random.uniform(1,3))
	# 				if page == 0:
	# 					tag_flag = True
	# 				time.sleep(random.uniform(0,2))
	# 				yield scrapy.Request(url,headers=self.header,callback=self.parse_four,dont_filter=True,meta={'tag_flag':tag_flag,'sid':shop['poiId'],'page':page})



	# def parse_four(self,response):
	# 	data = json.loads(response.text)['data']
	# 	if response.meta['tag_flag']:
	# 		all_tags = data['tags']
	# 		for t in all_tags:
	# 			self.tags.append({'tag':t['tag'],'count':t['count']})
	# 	comments = data['comments']

	# 	if comments is not None:
	# 		print("analysis in "+str(response.meta['page'])+"in"+str(response.meta['sid']))
	# 		for comment in comments:
	# 			c = comment['comment']
	# 			commentTime = comment['commentTime']
	# 			star = comment['star']
	# 			self.comments_list.append({'comment':c,'commentTime':commentTime,'star':star})
	# 	if comments is None:
	# 		print("commit in "+str(response.meta['page'])+" in "+str(response.meta['sid']))
	# 		strItem = StrItem()
	# 		strItem['sid'] = response.meta['sid']
	# 		strItem['commentTags'] = json.dumps(self.all_tags,ensure_ascii=False)
	# 		strItem['comments'] = json.dumps(self.comments_list,ensure_ascii=False)
	# 		yield strItem
	# 		self.page_flag = False


	
			# mztrItem = MztrItem()
			# mztrItem['shopId'] = shop_hide_message_json['detailInfo']['poiId'],
			# mztrItem['shopName'] = shop_hide_message_json['detailInfo']['name'],
			# mztrItem['cityName'] = cityName,
			# mztrItem['avgScore'] = shop_hide_message_json['detailInfo']['avgScore'],
			# # mztrItem['allCommentNum'] = shop_hide_message_json['allCommentNum'],
			# mztrItem['address'] = shop_hide_message_json['detailInfo']['address'],
			# mztrItem['avgPrice'] = shop_hide_message_json['detailInfo']['avgPrice'],
			# mztrItem['frontimg'] =shop_hide_message_json['photos']['frontImgUrl']
			# mztrItem['phone'] = shop_hide_message_json['detailInfo']['phone']	#可能有两个手机号，需要根据”/“拆分字符串
			# mztrItem['businessHours'] = shop_hide_message_json['detailInfo']['openTime']
			# mztrItem['dealList'] = shop_hide_message_json['dealList']['deals']	#商品列表：里面是字典格式
			# mztrItem['extraInfos'] = shop_hide_message_json['detailInfo']['extraInfos']	#列表










# class MztrSpider(scrapy.Spider):
# 	name = 'mztr'
# 	allowed_domains = ['meituan.com']
# 	start_urls = ['https://www.meituan.com/changecity/']
# 	header01={
# 	'User-Agent':Faker(locale='zh_CN').chrome(),
# 	'Referer':'https://pds.meituan.com/'
# 	}

# 	page = 1

# 	# 获取珠海的链接，请求美食页面
# 	def parse(self,response):
# 		city_list = response.xpath("//div[@class='city-area']/span/a")
# 		pds = city_list[655]
# 		city_name = pds.xpath("./text()").extract()[0]
# 		pds_url = "https:"+pds.xpath("./@href").extract()[0]+"/meishi/"
# 		time.sleep(random.uniform(1,3))
# 		yield scrapy.Request(pds_url,callback=self.parse_two,dont_filter=True,headers=self.header01,meta={"city_name":city_name,'pds_url':pds_url})

# 	# 获取美食链接,请求菜单页面
# 	def parse_two(self,response):
# 		city_name = response.meta["city_name"]
# 		two_list = response.xpath("//*[@id='app']/section/div/div[2]/div[1]/div/div[1]/ul/li/a")
# 		two_list.pop(0)
# 		# 遍历每种菜单
# 		for two in two_list:
# 			two_name = two.xpath("./text()").extract()[0]
# 			url = two.xpath("./@href").extract()[0].split(":")
# 			two_url = url[0]+"s:"+url[1]

# 			time.sleep(random.uniform(1,3))
# 			yield scrapy.Request(two_url,
# 				callback=self.parse_three,
# 				headers={'User-Agent':Faker(locale='zh_CN').chrome(),
# 						'Referer':'https://pds.meituan.com/meishi/'},
# 				dont_filter=True,
# 				meta={"two_name":two_name,"city_name":city_name,'url':two_url})

# 	# 获取单个菜单的响应,请求每页
# 	def parse_three(self,response):
# 		if response.status==404:
# 			self.page = 1
# 			return
# 		city_name = response.meta['city_name']
# 		two_name = response.meta['two_name']
# 		url = response.meta['url']

# 		shop_list = response.xpath("//*[@id='app']/section/div/div[2]/div[2]/div[1]/ul/li")

# 		# test11 = response.xpath("//*[@id='app']/section/div/div[2]/div[2]/div[2]/ul/li")
# 		# print(response)
# 		# print(test11)
# 		# print(test11[-2].xpath("./span/text()").extract()[0])
# 		# print(type(test11))
# 		# sys.exit(0)
# 		# all_page = int(response.xpath("//*[@id='app']/section/div/div[2]/div[2]/div[2]/ul/li/span/text()").extract()[-1])

# 		# 对单页中每个餐厅遍历
# 		for shop in shop_list:
# 			item = MztrItem()

# 			# //*[@id="app"]/section/div/div[2]/div[2]/div[1]/ul/li[1]/div[2]/a
# 			item['shopId'] = shop.xpath("./div[2]/a/@href").extract()[0]
# 			# //*[@id="app"]/section/div/div[2]/div[2]/div[1]/ul/li[1]/div[1]/a/div[2]/img
# 			item['frontimg'] = shop.xpath("./div[1]/a/div[2]/img/@src").extract()[0]
# 			# //*[@id="app"]/section/div/div[2]/div[2]/div[1]/ul/li[1]/div[2]/a/h4
# 			item['shopName'] = shop.xpath("./div[@class='info']/a/h4/text()").extract()[0]
# 			# //*[@id="app"]/section/div/div[2]/div[2]/div[1]/ul/li[1]/div[2]/a/div/p/text()[1]
# 			item['avgScore'] = shop.xpath("./div[@class='info']/a/div/p/text()").extract()[1]
# 			# //*[@id="app"]/section/div/div[2]/div[2]/div[1]/ul/li[1]/div[2]/a/div/p/span/text()[1]
# 			item['allCommentNum'] = shop.xpath("./div[@class='info']/a/div/p/span/text()").extract()[1]
# 			# //*[@id="app"]/section/div/div[2]/div[2]/div[1]/ul/li[1]/div[2]/a/p/text()
# 			item['address'] = shop.xpath("./div[@class='info']/a/p/text()").extract()[0]
# 			# //*[@id="app"]/section/div/div[2]/div[2]/div[1]/ul/li[1]/div[2]/a/p/span/text()[2]
# 			item['avgPrice'] = shop.xpath("./div[2]/a/p/span/text()").extract()[2]
# 			item['cityName'] = city_name
# 			item['shopstStyle'] = two_name
# 			# time.sleep(random.uniform(1,3))
# 			yield item
# 		self.page+=1
# 		time.sleep(random.uniform(1,3))
# 		yield scrapy.Request(url+"pn"+str(self.page),
# 			callback=self.parse_three,
# 			headers={'User-Agent':Faker(locale='zh_CN').chrome()},
# 			dont_filter=True,
# 			meta={'url':url,'city_name':city_name,'two_name':two_name})


